/************************************************************************
Purpose: 	Create a plant-day NeML trading-inventory data set.

Notes:      The final product is a plant-day NeML trading-inventory data set 
			(N=10,702) with following columns:
			* gpcb_id: plant unique id
			* commodity: commodity unique id
			* date: date
			* bid_id: bid unique id
			* bid_status: bid status
				* FILL: filled
				* PFILL: partially filled
				* CNL/CNLS: canceled
				* REJ: rejected
			* bid_price: bid price (Rs/kg)
			* bid_qty: bid quantity (kg)
			* bid_value = bid_price * bid_qty (Rs)
			* trade_price: trade price (Rs/kg)
			* trade_qty: trade quantity (kg)
			* trade_qty_cum: cumulative trade quantity (kg)
			* trade_value = trade_price * trade_qty (Rs)
			* allocated_quantity: initial permit allocation (kg)
			* permit_holding = allocated_quantity + trade_qty_cum (kg)
			* consumed: validated emissions in a period (kg)
			* current_inventory: permits left by the end of a period (kg)
			* sold: permits sold in a period (kg)
			* purchase: permits purchased in a period (kg)
*************************************************************************/

set more off
clear all
pause on

********************************************************************************
*** Clean NeML order-trade record
********************************************************************************

import delimited "$TRADING_DATA_IN/NeMLordertrade_Sept2019-Mar2021.csv", clear
tostring bid_id, replace format(%17.0g)

gen mydate = date(date,"YMD") 
sort mydate gpcb_id
drop date 
rename mydate date
format date %tddd-Mon-CCYY

********************************************************************************
*** Merge plant-period NeML Inventory data set
********************************************************************************

drop if gpcb_id == 158777056
destring gpcb_id, replace
merge m:1 gpcb_id commodity using "$TRADING_DATA_OUT/inventory_plant-commodity.dta"
* Dropping 4 observations from GPCBID 21320
drop if _merge == 1
drop _merge

rename status bid_status
sort gpcb_id commodity_period date bid_id
bysort gpcb_id commodity_period: gen trade_qty_cum = sum(trade_qty)
gen permit_holding = allocated_quantity + trade_qty_cum

sort gpcb_id commodity_period date bid_id
order gpcb_id commodity commodity_period date bid_id bid_status bid_price bid_qty bid_value trade_price trade_qty trade_qty_cum trade_value permit_holding allocated_quantity consumed_period final_inventory_period

* 10,702 (plant, commodity, date, bid) observations

label var gpcb_id "Plant GPCB ID"
label var commodity "Commodity ID"
label var date "Bid date"
label var bid_id "Bid ID"
label var bid_status "Bid status (REJ=rejected, CNLS=cancelled, FILL=filled)"
label var bid_price "Bid price (Rs/kg)"
label var bid_qty "Bid quantity (kg)"
label var bid_value "Bid value (Rs) = bid_qty * bid_price"
label var trade_price "Trade price (Rs/kg)"
label var trade_qty "Trade quantity (kg)"
label var trade_qty_cum "Trade quantity cumulative (kg)"
label var trade_value "Trade value (Rs) = trade_qty * trade_price"
label var permit_holding "Permit holdings (kg) = initial allocation + cumulative trade quantity " 

save "$TRADING_DATA_OUT/record_plant-commodity-date-bid.dta", replace

********************************************************************************
*** Check inconsistency
********************************************************************************

use "$TRADING_DATA_OUT/record_plant-commodity-date-bid.dta", clear

** 1) Check whether trade quantity = order quantity for full quantity order (FILL)
// br if trade_qty != bid_qty & bid_status == "FILL"

* 7 inconsistent records

// gpcb_id	commodity	bid_id
// 20579	PSUM010220	10661117340004
// 20820	PSUM010220	10661147590015
// 21184	PSUM161019	13171414020014
// 21335	PSUM010220	10661114390002
// 21387	PSUM010220	10661119540005
// 21486	PSUM010220	10661100410001
// 27688	PSUM010220	10661117320003

** 2) Check whether trade data and inventory data matches

preserve

replace trade_qty = 0 if missing(trade_qty)

* Create total purchase based on trade record
bysort gpcb_id commodity_period: egen total_purchased = sum(trade_qty) if trade_qty > 0
bysort gpcb_id commodity_period: egen my_purchased = mean(total_purchased)
drop total_purchased
replace my_purchased = 0 if missing(my_purchased)

* Create total sale based on trade record
bysort gpcb_id commodity_period: egen total_sold = sum(trade_qty) if trade_qty < 0
bysort gpcb_id commodity_period: egen my_sold = mean(total_sold)
drop total_sold
replace my_sold = 0 if missing(my_sold)
replace my_sold = -my_sold

* Generate inventory using purchase and sale from trade record
gen my_final_inventory = allocated_quantity + my_purchased - my_sold - consumed

* Collapse to plant-period level
keep gpcb_id commodity commodity_period purchased_period my_purchased sold_period my_sold final_inventory_period my_final_inventory
order gpcb_id commodity commodity_period purchased_period my_purchased sold_period my_sold final_inventory_period my_final_inventory
duplicates drop

* Check if purchase/sale/inventory match
sort commodity_period gpcb_id
keep if (purchased_period != my_purchased) | (sold_period != my_sold) | (final_inventory_period != my_final_inventory)
* 225 / 1560 plant-commodity observations are inconsistent.

rename commodity_period period
tab period
//      period |      Freq.     Percent        Cum.
// ------------+-----------------------------------
//           1 |          5        2.22        2.22
//           2 |         13        5.78        8.00
//           3 |         32       14.22       22.22
//           4 |          5        2.22       24.44
//           5 |         12        5.33       29.78
//           6 |        138       61.33       91.11
//           7 |          5        2.22       93.33
//           8 |          5        2.22       95.56
//           9 |          5        2.22       97.78
//          10 |          5        2.22      100.00
// ------------+-----------------------------------
//       Total |        225      100.00

drop if period == 6
tab gpcb_id
//     gpcb_id |      Freq.     Percent        Cum.
// ------------+-----------------------------------
//       13916 |          9       10.34       10.34
//       20488 |          1        1.15       11.49
//       20490 |          2        2.30       13.79
//       20525 |          1        1.15       14.94
//       20546 |          1        1.15       16.09
//       20579 |          1        1.15       17.24
//       20583 |          1        1.15       18.39
//       20623 |          2        2.30       20.69
//       20632 |          9       10.34       31.03
//       20633 |          9       10.34       41.38
//       20635 |          9       10.34       51.72
//       20685 |          9       10.34       62.07
//       20765 |          1        1.15       63.22
//       20820 |          1        1.15       64.37
//       20825 |          1        1.15       65.52
//       20826 |          1        1.15       66.67
//       20848 |          1        1.15       67.82
//       20869 |          1        1.15       68.97
//       20870 |          1        1.15       70.11
//       20880 |          2        2.30       72.41
//       20889 |          1        1.15       73.56
//       20935 |          1        1.15       74.71
//       21025 |          1        1.15       75.86
//       21033 |          1        1.15       77.01
//       21038 |          1        1.15       78.16
//       21122 |          1        1.15       79.31
//       21203 |          1        1.15       80.46
//       21260 |          1        1.15       81.61
//       21310 |          1        1.15       82.76
//       21334 |          2        2.30       85.06
//       21335 |          1        1.15       86.21
//       21387 |          1        1.15       87.36
//       21446 |          1        1.15       88.51
//       21457 |          1        1.15       89.66
//       21479 |          1        1.15       90.80
//       21486 |          2        2.30       93.10
//       21520 |          1        1.15       94.25
//       24939 |          1        1.15       95.40
//       27688 |          1        1.15       96.55
//       28161 |          1        1.15       97.70
//       32072 |          1        1.15       98.85
//       33830 |          1        1.15      100.00
// ------------+-----------------------------------
//       Total |         87      100.00

restore
